
import pandas as pd


# 读取csv文件
from matplotlib import pyplot as plt

df = pd.read_csv('amazon.csv')
# 列名
print(df.columns)
# 索引
print(df.index)
#处理空值，将空值替换为0
df['rating_count'].fillna(0, inplace=True)
print(df['rating_count'])

# 处理缺失值，删除rating列缺失的数据
df = df.dropna(subset=['rating'])
# 处理重复值，删除id与name相同的数据（保留第一行）
df = df.drop_duplicates(subset=['product_id','product_name'], keep='first')

print(df)
x=df['rating']
x=pd.Series(x)
count=x.value_counts()
count.plot(kind='bar')
plt.show()

